home *** CD-ROM | disk | FTP | other *** search
- /*
- MSWord2Text
- EvenMore FileIO plugin
- Author: Chris Perver
- Copyright (c) 2001
- */
-
-
- OPT REG=5
-
- LIBRARY 'msword2text.plugin',1,1,'MSWord 1.31' IS em_main, em_end, em_info, em_pluginid, em_begin, em_format
-
- MODULE 'tools/ctype', '*epo'
-
- DEF mem2:PTR TO CHAR, mswvers
-
-
- PROC em_isdatatype(memadr, lenadr)
- DEF tstr[20]:STRING ->ARRAY OF LONG
- DEF mswordfmtcount = 0
- DEF notdone = TRUE
- DEF mswordvers = 0
- DEF w:PTR TO LONG
- -> Find Word header
-
- w := Long(memadr)
- IF w = $D0CF11E0
- WHILE notdone
- IF memadr[mswordfmtcount] = "W"
- StrCopy(tstr, memadr + mswordfmtcount, 20)
- LowerStr(tstr)
- -> IF InStr(tstr,'msworddoc') = 0
- IF InStr(tstr,'word.document') = 0
- mswordvers := Val(memadr + mswordfmtcount + 14) -> 28) -> Get word version
- notdone := FALSE
- ENDIF
- ENDIF
- INC mswordfmtcount
- EXIT (mswordfmtcount = lenadr)
- ENDWHILE
- ENDIF
- ENDPROC mswordvers
-
-
- PROC em_parsedata(epo:PTR TO em_pluginobj, wordvers)
- DEF memadr:PTR TO CHAR, lenadr
- DEF count = 0, count2 = 0, tempstr[10]:STRING
- DEF wrapping = 80, notdonewrapping = TRUE, currchar = 0
- DEF notdone = TRUE, lastcount
-
- -> Allocate memory for file
- -> IF (mem2 := New(lenadr + 1)) = NIL THEN RETURN -1
- -> mem2[lenadr + 1] := "\n" -> Put safety LF at the end of mem
-
- memadr := epo.buffer
- lenadr := epo.length
-
- mem2 := epo.nbuffer
-
-
- -> mem2 := memadr
-
- -> FIND START OF TEXT
- WHILE notdone AND (count < lenadr)
- -> FIND START OF TEXT
- WHILE memadr[count] <> "\b" DO INC count
- lastcount := count
- WHILE memadr[count] <> $00 DO DEC count
- INC count
-
- SELECT count
- CASE 1280; notdone := FALSE
- CASE 1536; notdone := FALSE
- CASE 2560; notdone := FALSE
- DEFAULT; count := lastcount + 1
- ENDSELECT
- ENDWHILE
-
- notdone := TRUE
-
- WHILE notdone AND (count < lenadr)
- IF (currchar >= wrapping)
- WHILE notdonewrapping
- SELECT 256 OF memadr[count]
- CASE $00
- DEC count
- DEFAULT
- IF isspace(memadr[count]) = FALSE
- DEC count
- DEC count2
- ELSE
- notdonewrapping := FALSE
- ENDIF
- ENDSELECT
- ENDWHILE
- INC count
-
- currchar := 0
- mem2[count2++] := "\b"
- mem2[count2++] := "\n"
- notdonewrapping := TRUE
- ENDIF
-
- SELECT 256 OF memadr[count]
-
- -> 00
- CASE $00
- -> notdone := FALSE
- -> WriteF('quit\n')
- SELECT wordvers
- CASE 8
- CASE 6
- DEC count2
- notdone := FALSE
- ENDSELECT
-
-
- -> A with leg
- CASE $05
- mem2[count2++] := "a"
- INC currchar
-
- -> CR
- CASE $0d ->"\b"
- -> WriteF('\d\n', count2)
- IF (memadr[count + 1] = $00)
- -> WriteF('quit2\n')
- IF (memadr[count + 2] = $00)
- notdone := FALSE
- ENDIF
- ENDIF
- mem2[count2++] := memadr[count]
- mem2[count2++] := "\n" -> Add LF for printing compatibility
- currchar := 0
-
- -> ?
- CASE $15
-
- -> e with tilde
- CASE $19
- mem2[count2++] := "e"
- INC currchar
-
- -> Hidden forms ESC
- CASE $13
- INC count
- -> EXTRA LONG DASH
-
- IF memadr[count + 1] = $20
- mem2[count2++] := "-"
- INC count
- ELSE
- StrCopy(tempstr, memadr + count)
- IF InStr(tempstr, 'HYPERLINK')>=0
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "4"
- mem2[count2++] := ";"
- mem2[count2++] := "3"
- mem2[count2++] := "3"
- mem2[count2++] := "m"
- ENDIF
-
- WHILE memadr[count] <> $14 DO INC count
- ENDIF
-
- -> "
- CASE $93, $94
- mem2[count2++] := $22
- INC currchar
-
- -> ...
- CASE $85
- mem2[count2++] := "."
- mem2[count2++] := "."
- mem2[count2++] := "."
- currchar := currchar + 3
-
- -> BOX
- CASE "ð"
- IF memadr[count-1] = "p"
- DEC count2
- mem2[count2++] := "O"
- INC currchar
- ENDIF
-
- -> $0c TITLES
- CASE $0c
- mem2[count2++] := "\e"
- mem2[count2++] := "["
- mem2[count2++] := "1"
- -> Big title
- IF mem2[count+1] = $0c
- mem2[count2++] := ";"
- mem2[count2++] := "2"
- INC count
- IF mem2[count+1] = $0c
- mem2[count2++] := ";"
- mem2[count2++] := "3"
- INC count
- ENDIF
- ENDIF
- mem2[count2++] := "m"
-
-
- -> Special chars
- CASE $08
-
- -> ?
- CASE $01
-
- -> APOSTROPHE
- CASE $91
- mem2[count2++] := "`"
- INC currchar
-
- CASE $92
- mem2[count2++] := "'"
- INC currchar
-
- -> DASH
- CASE $96
- mem2[count2++] := "-"
- INC currchar
-
- -> More foreign chars
- CASE "B"
- SELECT 256 OF memadr[count + 1]
- -> L with a dash through it
- CASE $01
- mem2[count2++] := "l"
- INC currchar
- INC count
- DEFAULT
- mem2[count2++] := "B"
- INC currchar
- ENDSELECT
-
- -> TABLE
- CASE $07
- -> Break line
-
- SELECT 256 OF memadr[count + 1]
- -> e with tilde
- CASE $01
- mem2[count2++] := "é"
- INC currchar
- INC count
- -> End of cell in table
- CASE $07
- -> If another tab broken by $00, break line
-
- mem2[count2++] := "\b"
- currchar := 0
- WHILE memadr[count + 1] = $07 DO INC count
- -> Skip on to next table
- -> ENDIF
- CASE $00
- IF memadr[count + 2] = $07
- -> wee break
- mem2[count2++] := "\b"
- currchar := 0
- count := count + 2
- IF memadr[count + 1] = $00
- -> big break
- INC count
- IF memadr[count + 2] = $07
- INC count
- ->count := count + 2
- ENDIF
- ENDIF
- ELSE
- -> Skip to next text
- WHILE memadr[count + 1] = $00 DO INC count
- mem2[count2++] := "\t"
- INC currchar
- ENDIF
- -> Tab to show split between table
- DEFAULT
- mem2[count2++] := "\t"
- INC currchar
- ENDSELECT
-
- -> SIMILAR TABLE CHAR TO 07
- CASE $0B
- mem2[count2++] := "\b"
- mem2[count2++] := "\t"
- currchar := 1
-
- DEFAULT
- mem2[count2++] := memadr[count]
- INC currchar
- ENDSELECT
- INC count
-
- ENDWHILE
-
- mem2[count2++] := "\b"
- mem2[count2] := "\n"
-
- -> IF memadr
- -> Dispose(memadr); memadr := NIL
- -> ENDIF
- -> WriteF('\d\n', count2)
- ENDPROC mem2, count2
- -><
-
- -> *** STANDARD PROCS FOR PLUGINS
-
-
- PROC em_main(epo:PTR TO em_pluginobj)
- DEF len2
- mem2, len2 := em_parsedata(epo, mswvers)
- -> epo.buffer := mem2
- -> epo.length := len2
- -> WriteF('\d\n', len2)
- epo.nlength := len2
- RETURN TRUE
- ENDPROC
-
- PROC em_begin(epo:PTR TO em_pluginobj)
- mswvers := em_isdatatype(epo.buffer, epo.length)
- IF (mswvers = 8) OR (mswvers = 6)
- RETURN "MEM", (epo.length * 2)
- ELSE
- RETURN FALSE
- ENDIF
- ENDPROC
-
- PROC em_end() IS EMPTY
-
- PROC em_info() IS 'MSWord2Text 1.31'
-
- PROC em_pluginid() IS "FILE"
-
- PROC em_format() IS 'MSWord'
-
- PROC main() IS EMPTY
-